## Figure 4: Distribution of Intermediate Goods Exporters

rm(list=ls())

## library -----------------------------------------------------------
library(ggplot2)
library(gridExtra)
library(dplyr)

## setup -------------------------------------------------------------
## setting the working directory as the current directory
setwd(getwd())

## check/create the folder to put outputs
if (file.exists(file.path(getwd(), "figs"))){
    FIG_DIR <- file.path(getwd(), "figs")
} else {
    dir.create(file.path(file.path(getwd(), "figs")))
    FIG_DIR <- file.path(getwd(), "figs")
}
    
source("util.R")

## -------------------------------------------------------------------
## firms responded twice
resptwice <- c("R_3lKP5vVopq27Q5T", "R_ewwjPKYf1mQtVkx",
               "R_8wVpt534HFyvbG5","R_7QwtntdSZ7ONCaV",
               "R_787WpeOkIatHWkZ", "R_8umeWuf3qISiJcV",
               "R_85MeGXniJgeyA6h", "R_0Sy2CDRAw2ddHql",
               "R_5pDS87UuBf5ekU5","R_9YwTFMe01hCQIDP",
               "R_38WL2rp1ocK9c1v","R_0qEMbRZcn0W3gMd",
               "R_3UdWL2nJMFa61hP","R_ctKLc9LF3hANh4x")

## read survey data
survey <- read.csv("crsurvey_full.csv")
rm <- which(survey$V1 %in% resptwice)
survey <- survey[-rm,]
id <- data.frame(id=survey$V1, proccode=survey$proccode)

## reading procomer data
options(scipen = 999)
procomer <- read.csv("procomerALL.csv")
procomer$hs10 <- gsub("^p", "", procomer$hs10)
procomer$hs10 <- as.numeric(procomer$hs10)

## fixing 9 digit cases
hs10 <- as.character(procomer$hs10)
a <- nchar(hs10)
idx <- which(a==9)
hs10[idx] <- paste("0", hs10[idx], sep="")
procomer$hs10 <- hs10

## reading concordance
hs2sitc <- read.csv("Concordance_HS_to_S2.CSV", colClasses="character")
hs2sitc <- hs2sitc[,c(1,3)]
colnames(hs2sitc) <- c("hs", "sitc")

sitc2bec <- read.csv("Concordance_S2_to_BE.CSV", colClasses="character")
sitc2bec <- sitc2bec[,c(1,3)]
colnames(sitc2bec) <- c("sitc", "bec")

hs2bec <- merge(hs2sitc, sitc2bec, by=c("sitc"))

## capital goods
capital <- c("41", "521")
## intermediate goods
intm <- c("111","121","21","22","31","322","42","53")
## consumption goods
consume <- c("112","122","522","61","62","63")

ids <- unique(as.character(id$proccode))
## find procomer firms
ids <- ids[which(ids!="")]
ids[which(ids == "E2616; E4532")] <- "E2616"

## info data
infoD <- data.frame()
for(i in 1:length(ids)){
    print(i)
    id.i <- ids[i]
    if(id.i == "E2616; E4532"){
        id.i <- "E2616"
    }

    info <- getInfo(id.i)
    infoD <- rbind(infoD, info)

}

## intermediate goods
D <- data.frame(procomerID = ids,
                capital=NA, intermediate=NA, consumption=NA)

D <- merge(D, id, by.x=c("procomerID"), by.y=c("proccode"), all.x=T)

rm <- which(D$id %in% resptwice)
if(length(rm)>0){
    D <- D[-rm,]
}

D$procomerID <- as.character(D$procomerID)
ids <- as.character(D$procomerID)
D$int_dummy <- rep(NA, nrow(D))
D$final_dummy <- rep(NA, nrow(D))



hs6 <- TRUE
for(i in 1:length(ids)){
    print(i)
    id.i <- ids[i]
    if(id.i == "E2616; E4532"){
        id.i <- "E2616"
    }

    info <- getInfo(id.i)
    infoD <- rbind(infoD, info)


    if(hs6){
        resp_hs <- getHS_6(id.i)
        top.hs6 <- resp_hs$hs6[which(resp_hs$sum == max(resp_hs$sum))[1]]
    } else {
        resp_hs <- getHS_10(id.i)
        top.hs10 <- resp_hs$hs10[which(resp_hs$sum == max(resp_hs$sum))[1]]
        top.hs6 <- substring(top.hs10,1,6)
    }
    tmp <- filter(hs2bec, hs == top.hs6)
    top.bec <- tmp$bec[1]

    is_int <- ifelse(top.bec %in% intm, 1, 0)
    is_final <- ifelse(top.bec %in% consume, 1, 0)

    ##  Check second product if top product is capital
    if(is_int==0 & is_final==0){ 
        second <- sort(resp_hs$sum, decreasing=T)[2]
        if(hs6){
            second.hs6 <- resp_hs$hs6[which(resp_hs$sum == second)[1]]
        } else {
            second.hs10 <- resp_hs$hs10[which(resp_hs$sum == second)[1]]
            second.hs6 <- substring(second.hs10,1,6)
            
        }
        tmp <- filter(hs2bec, hs == second.hs6)
        second.bec <- tmp$bec[1]
        is_int2 <- ifelse(second.bec %in% intm, 1, 0)
        is_final2 <- ifelse(second.bec %in% consume, 1, 0)

        if(is_int2==1 | is_final2==1){
            is_int <- is_int2
            is_final <- is_final2
            cat("found from second\n")
        } 
    }
    
    D$int_dummy[i] <- is_int
    D$final_dummy[i] <- is_final


    capital.j <- 0
    intermediate.j <- 0
    consumption.j <- 0
    total.w <- sum(resp_hs[,2])

    for(j in 1:nrow(resp_hs)){
        if(hs6){
            hs.j <- resp_hs$hs6[j]
            a <- filter(hs2bec, hs == hs.j)
        } else {
            hs.j <- resp_hs$hs10[j]
            hs.j <- substring(hs.j, 1,6)
            a <- filter(hs2bec, hs == hs.j)
        }
        
        if(nrow(a)==1){
            bec.j <- unique(as.character(a$bec))

            if(bec.j %in% capital){
                capital.j <- capital.j + as.numeric(resp_hs[j,2])
            }
            if(bec.j %in% intm){
                intermediate.j <- intermediate.j + as.numeric(resp_hs[j,2])
            } 
            if(bec.j %in% consume){
                consumption.j <- consumption.j + as.numeric(resp_hs[j,2])
            }
        } else {
            if(nrow(a)>1){
                cat(j, "***", nrow(a), "\n")
            }
        }
        
    }
    D$capital[i] <- capital.j/total.w
    D$intermediate[i] <- intermediate.j/total.w
    D$consumption[i] <- consumption.j/total.w

}

## procomer firms' intermediate goods distribution as a comparison
procomer <- filter(procomer, year==2011)
procomeridAll <- as.character(unique(procomer$procomerID))
P.capital <- rep(NA, length(procomeridAll))
P.intermediate <- rep(NA, length(procomeridAll))
P.consumption <- rep(NA, length(procomeridAll))

for(i in 1:length(procomeridAll)){
    print(i)
    id.i <- procomeridAll[i]
    if(id.i == "E2616; E4532"){
        id.i <- "E2616"
    }
    
    resp_hs <- getHS_10(id.i)
    
    capital.j <- 0
    intermediate.j <- 0
    consumption.j <- 0
    total.w <- sum(resp_hs[,2])

    for(j in 1:nrow(resp_hs)){
        hs.j <- resp_hs$hs10[j]
        hs.j <- substring(hs.j, 1,6)
        a <- filter(hs2bec, hs == hs.j)
        
        if(nrow(a)==1){
            bec.j <- unique(as.character(a$bec))

            if(bec.j %in% capital){
                capital.j <- capital.j + as.numeric(resp_hs[j,2])
            }
            if(bec.j %in% intm){
                intermediate.j <- intermediate.j + as.numeric(resp_hs[j,2])
            } 
            if(bec.j %in% consume){
                consumption.j <- consumption.j + as.numeric(resp_hs[j,2])
            }
        } else {
            if(nrow(a)>1){
                cat(j, "***", nrow(a), "\n")
            }
        }
    }
    P.capital[i] <- capital.j/total.w
    P.intermediate[i] <- intermediate.j/total.w
    P.consumption[i] <- consumption.j/total.w
}


class <- data.frame(bec=c(capital,intm,consume))
class$class <- c(rep("capital",2), rep("intermediate",8), rep("consumption", 6))

ids <- as.character(unique(procomer$procomerID))
df <- data.frame()
for(f in 1:length(ids)){
    print(f)
    id.f <- ids[f]
    test <- filter(procomer, procomerID == id.f)
    year.recent <- max(as.numeric(as.character(test$year)))
    sub <- filter(test, year == year.recent)
    sub$hs6 <- substring(sub$hs10, 1,6)
    sum <- summarize(group_by(sub, hs6),
                     total=as.numeric(sum(as.numeric(value), na.rm=T)))
    sum <- arrange(sum, desc(total))
    hsall <- sum$hs6
    for(i in 1:length(hsall)){
        hs6.i <- hsall[i]
        bec.i <- as.character(filter(hs2bec, hs==hs6.i)$bec[1])
        sum$bec <- bec.i
    }
    sum$bec <- as.character(sum$bec)
    class$bec <- as.character(class$bec)
    firm.data <- sum %>% inner_join(class, by="bec")
    tmp <- summarize(group_by(firm.data, class),
                     totalv = sum(total, na.rm=T))
    ## fill in all categories
    cat <- c("consumption", "intermediate", "capital")
    missing <- which(!cat %in% tmp$class)
    if(length(missing)>0){
        tmp <- rbind(tmp, data.frame(class = cat[missing], totalv=0))
    }
    a <- data.frame(procomerID=id.i, consumption=as.numeric(tmp[1,2]), 
                    intermediate=as.numeric(tmp[2,2]), capital=as.numeric(tmp[3,2]))
    df <- rbind(df, a)

}


## survey data
survey <- read.csv("./crsurvey_full.csv")
survey$foreign_own <- ifelse(survey$own_3 == 100 ,1, 0)
survey$domestic_own <- ifelse(survey$own_1 == 100 ,1, 0)

survey.sub <- survey[which(survey$V1 %in% D$id),]
colnames(survey.sub)[1] <- "id"
D2 <- merge(D, survey.sub, by="id", all.x=T)


pdf(file.path(FIG_DIR, "figure4.pdf"),
    width=12,height=10) # for paper

par(cex.main=2, cex.lab=2, cex.axis=1.3,
    mar=c(6,6,4,4))

hist(D2$intermediate, breaks=15,
     xlab="", col="lightgray",
     ylab="",
     main="")

mtext(side=1, "Proportion of Intermediate Goods Out of Total Exports",
      cex=2.5, line=3.5)

mtext(side=2, "Frequency",
      cex=2.7, line=3)

arrows(0.4, 70, 0.15, 40)
text(0.4, 75, "Distribution from\nAll Costa Rican Exporters",
     cex=2, lwd=2)

arrows(0.6, 45, 0.95, 20)
text(0.6, 50, "Distribution from\nRespondents (Exporters)",
     cex=2, lwd=2)


par(new=TRUE)
d <- density(P.intermediate) # returns the density data 
plot(d,
     yaxt="n", xaxt="n",
     ylab="", xlab="", main="", lwd=3,
     xlim=c(0,1)) 

dev.off()

